#coding='utf-8'
from requests import Request, Session
import time
from imp import reload
from urllib import request as urllib2
from lxml import etree
import pymysql
import json
import sys
reload(sys)
import requests

headers = {
    'Cookie': '__gads=ID=624dc9dbef8c62a8:T=1525230822:S=ALNI_MZPn2PMWldnTCzH_6SijFSSeBMcYw; _HUPUSSOID=ec925b63-d86b-4a5b-8dd1-38c2c9238a28; __dacevid3=0xd3317e571f1efd2c; AUM=dghSu4KWPUvBrPzOadc9n2lXjN5nt-yqlWtWpTno5j7lw; PHPSESSID=4ttr22ush7pnskgasto4u4vsp3; _fmdata=Nr66dV82IpuVx0vB6U2ZT4lHqH72dUMRRXh59tX0YwgnHyVXx1AzWuVtmcgeJoAsf9PEZu8ffxv72zRtMO8COnl8EZtUihyUuln57C40QEw%3D; _cnzz_CV30020080=buzi_cookie%7Cb85e7b69.22c0.b705.576f.18cfffd5023f%7C-1; _dacevid3=b85e7b69.22c0.b705.576f.18cfffd5023f; _CLT=918ebe7bb324d8673460f7af1d701a5c; u=31333267|5b6Q5rW35Lic5ZWK|ef0e|4a52c279144703997f3ad577d4ad0a51|144703997f3ad577|aHVwdV83YWM2YTkzNDVmNmExZjZk; ua=22446340; us=c58b544c3fc2ebf6a7600b4d10d39d77a2ebf31e221c3b9df7d2e4efd482dadc7839edf15e963cbad20142384565f9cc5e85c54536a52fd270518474ab514cd1; __dacevst=042292e3.5363c98e|1526353025159',
    'Host': 'bbs.hupu.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36',
}
def get_one_page(url):


    s = Session()
    req = Request('GET', url, headers=headers)
    prepped = s.prepare_request(req)
    r = s.send(prepped)
    if r.status_code == 200:
        print(r.status_code)
        return r.text
    return None
def main(Page):
    # 连接数据库
    db = pymysql.connect(host='localhost', user='root', password='', port=3306, db='spiders', charset='utf8')
    cursor = db.cursor()
    sql = 'INSERT INTO hupu_one(title,userName,publishTime) values(%s, %s, %s)'
    url='https://bbs.hupu.com/vote-'+str(Page)
    # url='https://bbs.hupu.com/cavaliers-'+str(Page)  虎扑骑士专区链接
    html = get_one_page(url)
    dom = etree.HTML(html)
    if dom is None:
        return
    node_list = dom.xpath('//ul[@class="for-list"]/li')


    startTime = '2018-05-20'
    endTime = '2018-05-23'
    for node in node_list:
    # 得到用户名
        title = node.xpath('./div[@class="titlelink box"]/a[@class="truetit"]//text()')
        userName = node.xpath('./div[@class="author box"]/a[@class="aulink"]//text()')
        publishTime = node.xpath('./div[@class="author box"]/a[@style="color:#808080;cursor: initial; "]/text()')
        # nowTime=time.strftime('%Y-%m-%d', time.localtime())

        if  endTime>''.join(publishTime)>startTime:
        #     爬取标题、用户名、发布时间
            items = {
                'title': title,
                'userName': userName,
                'time': publishTime

            }

            cursor.execute(sql,( title, userName, publishTime))
            db.commit()
            print(items)

if __name__ == '__main__':
    for i  in range(35):
        main(Page=i+1)
        time.sleep(3.5)

